clear all
cap close
set more off
set mem 800m

cd ".\Data_Program"

* ==============================================================================
* Merge call report (id:rssd) with HMDA (id:agency-respondentid)
* ==============================================================================

  * Avery match file 
use Econdata\hmdpanel17,clear

keep if TYPE08!=.  //keep year 2008 filers

keep hmprid code NAME08 RSSD08

rename NAME08 respondentname
rename hmprid respondentid
rename code agencycode
rename RSSD08 idrssd   

 * Merge with call report
merge m:1 idrssd using call 
drop if _merge==2  

keep respondentname agencycode respondentid idrssd callname dcr* cr* survivor dmsr msr08 msr15 banktype tota

gen bank_dummy=(survivor!=.)  // all banks in 2008 cll report (50% lenders)

sort bank_dummy survivor idrssd

save call_hmda08,replace


* ==============================================================================
* Construct 2008 lending-share-weighted regulation shocks at county level
* ==============================================================================

 * LAR 2008
use Econdata\hmda_data\lar2008,clear   //HMDA (public) 2008

rename agency_code agencycode
rename respondent_id respondentid

merge m:1 agencycode respondentid using call_hmda08
keep if _merge==3   
keep loan_amount county respondentname idrssd callname dcr* cr* bank_dummy 


* Construct bank lending share at county level (2008)
drop if county==.   
sort county idrssd
by county idrssd: egen lenderamt=sum(loan_amount)
by county idrssd: keep if _n==1
drop loan_amount   // county-lender level

gen big4 = inlist(idrssd,480228,476810,852218,451965)   //BoA, Citibank, JP Morgan, WF

sort county
by county: egen countyamt=sum(lenderamt)
by county: egen big4amt = sum(big4*lenderamt)
by county: egen bankamt=sum(bank_dummy*lenderamt)

gen lendershare = lenderamt/countyamt   
gen big4share08 = big4amt/countyamt 
gen bankshare08 = bankamt/countyamt

sort idrssd county
by idrssd: gen ncounty=_N 
tab ncounty if big4==1

* Construct bank-lending-share-weighted regulation shocks
sort county
by county: egen dcr_wt =sum(lendershare*dcr)
by county: egen cr08_wt=sum(lendershare*cr08)
by county: keep if _n==1

keep county dcr_wt cr08_wt bankshare08 big4share08
save regshock,replace


* ==============================================================================
* Construct county-level variables
* ==============================================================================

use Econdata\hmda_data\county,clear
keep if year==2008|year==2015

    * merge with division data (get state info)
merge m:1 county using Econdata\countytodivision
keep if _merge==3  // unmatched are PR and AK
drop _merge

* county finshare_ad
merge 1:1 year county using Proprietary\mintel_annual
drop if _merge==2 
drop _merge
replace finad=0 if finad==.

    * house price growth (get hpi)
merge 1:1 year county using Proprietary\hpi_county_year_exp
drop if _merge==2 
drop _merge
drop hpgr

    * population growth (get pop)
merge 1:1 year county using Econdata\pop_county
drop if _merge==2  
drop _merge
drop popgr fracm 

    * employment growth (get emp)
merge 1:1 year county using Econdata\emp_temp.dta 
drop if _merge==2 
drop _merge

    * CCP subprime share
merge 1:1 year county using Proprietary\subprime_county_year
drop if _merge==2  
drop _merge

     * education level in 2010
merge m:1 county using Econdata\edu
drop if _merge==2  
drop _merge hsabove

* migration
merge 1:1 year county using Econdata\migration
drop if _merge==2  
drop _merge


sort county year
by county: gen rep=_N
drop if rep==1  


* outcome
by county: gen dfinshare=finshare-finshare[_n-1]
by county: gen dfinshare_refi =finshare_refi-finshare_refi[_n-1]
by county: gen dfinshare_pur =finshare_pur-finshare_pur[_n-1]

* control
by county: gen hpgr=ahpi/ahpi[_n-1]-1
by county: gen popgr=pop/pop[_n-1]-1
by county: gen empgr=emp/emp[_n-1]-1
by county: gen dfinad=log(finad+1)-log(finad[_n-1]+1)
by county: gen subprime08=subprime[_n-1]
by county: gen fracb08=fracb[_n-1]
by county: gen fracy08=fracy[_n-1]
by county: gen edu08=babove[_n-1]

* weights
by county: gen pop08=pop[_n-1]
by county: gen tot08=tot[_n-1]

* single cross-sectional data
drop if dfinshare==.  

* merge with regulation shock
merge 1:1 county using regshock
keep if _merge==3
drop _merge

keep county state division region tot08 pop08 dfinshare* dcr_wt cr08_wt bankshare08 big4share08 hpgr popgr empgr dfinad fracb08 fracy08 subprime08 edu08

rename *08 *
rename big4share big4share08
save county_var,replace


* ==============================================================================
* Match counties with their neighbors' variables
* ==============================================================================

use Econdata\sci,clear
drop if county1==county2

gen st1=floor(county1/1000)
gen st2=floor(county2/1000)
drop if inlist(st1,2,15)|st1>56
drop if inlist(st2,2,15)|st2>56

* (II) identify CZ
rename county1 county
merge m:1 county using Econdata\cz_county_crosswalk
drop if _merge==2
drop _merge
rename county county1
rename cz cz1

rename county2 county
merge m:1 county using Econdata\cz_county_crosswalk
drop if _merge==2
drop _merge
rename county county2
rename cz cz2

* indicating distant friends
gen outcz=1-(cz1==cz2 & cz1!=. & cz2!=.)     
drop cz2 

gsort county1 -sci
by county1: keep if _n<=200

keep county1 county2 sci outcz cz1

* II. Merge with county 2's data
rename county2 county
merge m:1 county using county_var 
keep if _merge==3
drop _merge

sort county1
by county1: egen sumwt=sum(sci)
gen weight = sci/sumwt

by county1: egen dfinshare2    = sum(weight*dfinshare)
by county1: egen bankshare2    = sum(weight*bankshare)
by county1: egen dcr_wt2       = sum(weight*dcr_wt)
by county1: egen cr08_wt2      = sum(weight*cr08_wt)
by county1: egen big4share_wt2 = sum(weight*big4share08)

by county1: egen hpgr2 = sum(weight*hpgr)
by county1: egen popgr2 = sum(weight*popgr)
by county1: egen fracy2 = sum(weight*fracy)
by county1: egen fracb2 = sum(weight*fracb)
by county1: egen empgr2 = sum(weight*empgr)
by county1: egen subprime2 = sum(weight*subprime)
by county1: egen edu2 = sum(weight*edu)
by county1: egen dfinad2 = sum(weight*dfinad)


drop sumwt weight
by county1: egen sumwt=sum(sci) if outcz==1
gen weight = sci/sumwt

by county1: egen dfinshare_outcz2    = sum(weight*dfinshare)
by county1: egen dcr_wt_outcz2       = sum(weight*dcr_wt)
by county1: egen cr08_wt_outcz2      = sum(weight*cr08_wt)
by county1: egen big4share_wt_outcz2 = sum(weight*big4share08)
by county1: keep if _n==1

keep county1 cz1 *2 

* III Merge with county 1 data
rename county1 county
merge 1:1 county using county_var
keep if _merge==3
drop _merge

gen w=tot
save beforereg_sci,replace   //3200 counties